clear all
set more off
cap log close

********************************************************************************
***** Project: The Short and Long Term Effects of In-Person Performance Feedback
********************************************************************************
***** A. R. Soetevent & G. J. Romensen
********************************************************************************
***** Descriptive Statistics Coached and Uncoached Drivers
********************************************************************************
***** WARNING: RUNS FOR AGES
********************************************************************************
***** Latest update: 30-09-2024
********************************************************************************
global filepath "C:\JPEMicReplication"
global paperpath "$filepath\TablesGraphs"
log using "$filepath/Logs/X01Coachvsnocoachdescriptives.log", replace

use "$filepath\DEPO\DataMainAnalysisDEPO.dta"

rename bustype bustypes
gen byte ZH = 0 
replace ZH = 1 if regio == "ZH"
gen byte FR = 0 
replace FR = 1 if regio == "FR"


** Drop data from ZH ** 
keep if FR == 1
*  Drop months with imperfect tracking by coaches 
drop if datum>date("30-4-2016", "DMY")


** Distinguish between Leeuwarden "Stad" en "Streek" drivers
merge m:1 chauf_nr_rug rndlocid using "$filepath\DEPO\driversLWcityruralDEPO.dta"
drop _merge
replace rndlocid = 181 if ves_naam_chauf=="Leeuwarden Stad"
drop ves_naam_chauf

* Drop eco-coaches [chauf_nr_rug: randomly generated depository numbers!]
local eco_nr "939 1404 519 1286 1610 531"

foreach x of local eco_nr {
drop if chauf_nr_rug==`x'
}

** Keep baseline data only 
keep if postannouncement==0


/*** Determine the set of observations used for the analysis ***/
gen byte regobsfuel=1
replace regobsfuel=0 if geplande_ritafstand==. | lnovcheckins==. | punctuality==. | aantal_haltes==. | dep_fueleconomyLpKM==.

gen byte regobsabc=1
replace regobsabc=0  if geplande_ritafstand==. | lnovcheckins==. | punctuality==. | aantal_haltes==. | dep_acceleratie==. | dep_bochten==. | dep_rem==.

****************************************
*** A. Create global list of covariates
****************************************
* Outcome variables
global abcd "acceleratie rem bochten fueleconomyLpKM"
* Explanatory variables
	gen triplength=geplande_ritafstand/1000
	gen bustype1=cond(strpos(voertuig_omschrijving, "VDL"),1,0)
	gen bustype2=cond(strpos(voertuig_omschrijving, "INTOURO"),1,0)
	gen bustype3=cond(strpos(voertuig_omschrijving, "IRISBUS"),1,0)
global covExogenous "gebjaar jaardienst fulltimer geslacht punctuality triplength ovcheckins aantal_haltes ochtendspits avondspits weekendrit uitleenrit vakantierit stadsrit schooltraject bustype1 bustype2 bustype3"
global covbaseExogenous "base_gebjaar base_jaardienst base_fulltimer base_geslacht base_punctuality base_triplength base_ovcheckins base_aantal_haltes base_ochtendspits base_avondspits base_weekendrit base_uitleenrit base_vakantierit base_stadsrit base_schooltraject base_bustype1 base_bustype2 base_bustype3"
** Note: this is other covEnvironm than used in other do files: "geplande_ritafstand aantal_haltes" have been replaced by "aantal_haltespKM"


**********************************************************
*** Useful macro's
levelsof chauf_nr_rug, local(level_drivers)
levelsof coachdatum_1, local(level_coachdatum)
tab rndlocid
levelsof rndlocid, local(level_loc)
**********************************************************



gen byte selectie=0
tempfile temp
save `temp'

******************************************************************************
*** 2A. Determine baseline performance (pre-announcement) ABC + fuel economy:
******************************************************************************

** Drop all Irisbus observations [14.09.2018]
replace selectie=regobsfuel 
drop if selectie == 0

** Make temp file with some details on coaching dates later merged with main data file
preserve 
	tempfile additionalvars
	duplicates drop chauf_nr_rug, force
	qui keep chauf_nr_rug rndlocid coachdatum_1 
	save `additionalvars'
restore


** a. Baseline performance drivers
tempfile baseline
collapse (mean) dep_acceleratie dep_rem dep_bochten dep_fueleconomyLpKM, by(chauf_nr_rug)

foreach var in $abcd {
	qui rename  dep_`var' base_`var'
}


merge 1:1 chauf_nr_rug using `additionalvars'
drop if _merge~=3
drop _merge

save `baseline'

gen byte Coached=. 
replace Coached = 1 if (coachdatum_1 <=date("30-4-2016", "DMY")  & coachdatum_1~=. )
replace Coached = 0 if (coachdatum_1 > date("30-4-2016", "DMY")  | coachdatum_1==. )
label variable Coached "=1 if coached per 30-4-2016, 0 otherwise"


*********************************************************************************************
*** [TABLE 1 -- Summary Statistics Baseline Performance and Non-Performance Characteristics]
*********************************************************************************************

use `temp', clear

** Drop all Irisbus observations [14.09.2018]
*replace selectie=regobsfuel 
*drop if selectie == 0

** a. Baseline performance drivers
collapse (mean) $covExogenous, by(chauf_nr_rug)

foreach var in $covExogenous {
	qui rename  `var' base_`var'
}


merge 1:1 chauf_nr_rug using `additionalvars'
qui drop if _merge~=3
qui drop _merge



gen byte Coached=. 
qui replace Coached = 1 if (coachdatum_1 <=date("30-4-2016", "DMY")  & coachdatum_1~=. )
qui replace Coached = 0 if (coachdatum_1 > date("30-4-2016", "DMY")  | coachdatum_1==. )
label variable Coached "=1 if coached per 30-4-2016, 0 otherwise"

foreach var in $covExogenous {
	di "base_`var'"
	* Accounting for location differences in base levels:
	reg base_`var' Coached  ibn.rndlocid, noconstant
	margins, at(Coached == 0)
	margins, at(Coached == 1)
}


*****************************************************************************
***  [END Table 1]
*****************************************************************************

log close